{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df=pd.read_csv('C:/Users/dhava/Desktop/Janvi/KDD/Hotel_Reviews.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hotel_Address</th>\n",
       "      <th>Additional_Number_of_Scoring</th>\n",
       "      <th>Review_Date</th>\n",
       "      <th>Average_Score</th>\n",
       "      <th>Hotel_Name</th>\n",
       "      <th>Reviewer_Nationality</th>\n",
       "      <th>Negative_Review</th>\n",
       "      <th>Review_Total_Negative_Word_Counts</th>\n",
       "      <th>Total_Number_of_Reviews</th>\n",
       "      <th>Positive_Review</th>\n",
       "      <th>Review_Total_Positive_Word_Counts</th>\n",
       "      <th>Total_Number_of_Reviews_Reviewer_Has_Given</th>\n",
       "      <th>Reviewer_Score</th>\n",
       "      <th>Tags</th>\n",
       "      <th>days_since_review</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>8/3/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>Russia</td>\n",
       "      <td>I am so angry that i made this post available...</td>\n",
       "      <td>397</td>\n",
       "      <td>1403</td>\n",
       "      <td>Only the park outside of the hotel was beauti...</td>\n",
       "      <td>11</td>\n",
       "      <td>7</td>\n",
       "      <td>2.9</td>\n",
       "      <td>[' Leisure trip ', ' Couple ', ' Duplex Double...</td>\n",
       "      <td>0 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>8/3/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>Ireland</td>\n",
       "      <td>No Negative</td>\n",
       "      <td>0</td>\n",
       "      <td>1403</td>\n",
       "      <td>No real complaints the hotel was great great ...</td>\n",
       "      <td>105</td>\n",
       "      <td>7</td>\n",
       "      <td>7.5</td>\n",
       "      <td>[' Leisure trip ', ' Couple ', ' Duplex Double...</td>\n",
       "      <td>0 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>7/31/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>Australia</td>\n",
       "      <td>Rooms are nice but for elderly a bit difficul...</td>\n",
       "      <td>42</td>\n",
       "      <td>1403</td>\n",
       "      <td>Location was good and staff were ok It is cut...</td>\n",
       "      <td>21</td>\n",
       "      <td>9</td>\n",
       "      <td>7.1</td>\n",
       "      <td>[' Leisure trip ', ' Family with young childre...</td>\n",
       "      <td>3 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>7/31/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>My room was dirty and I was afraid to walk ba...</td>\n",
       "      <td>210</td>\n",
       "      <td>1403</td>\n",
       "      <td>Great location in nice surroundings the bar a...</td>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "      <td>3.8</td>\n",
       "      <td>[' Leisure trip ', ' Solo traveler ', ' Duplex...</td>\n",
       "      <td>3 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>7/24/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>New Zealand</td>\n",
       "      <td>You When I booked with your company on line y...</td>\n",
       "      <td>140</td>\n",
       "      <td>1403</td>\n",
       "      <td>Amazing location and building Romantic setting</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>6.7</td>\n",
       "      <td>[' Leisure trip ', ' Couple ', ' Suite ', ' St...</td>\n",
       "      <td>10 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       Hotel_Address  \\\n",
       "0   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "1   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "2   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "3   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "4   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "\n",
       "   Additional_Number_of_Scoring Review_Date  Average_Score   Hotel_Name  \\\n",
       "0                           194    8/3/2017            7.7  Hotel Arena   \n",
       "1                           194    8/3/2017            7.7  Hotel Arena   \n",
       "2                           194   7/31/2017            7.7  Hotel Arena   \n",
       "3                           194   7/31/2017            7.7  Hotel Arena   \n",
       "4                           194   7/24/2017            7.7  Hotel Arena   \n",
       "\n",
       "  Reviewer_Nationality                                    Negative_Review  \\\n",
       "0              Russia    I am so angry that i made this post available...   \n",
       "1             Ireland                                         No Negative   \n",
       "2           Australia    Rooms are nice but for elderly a bit difficul...   \n",
       "3      United Kingdom    My room was dirty and I was afraid to walk ba...   \n",
       "4         New Zealand    You When I booked with your company on line y...   \n",
       "\n",
       "   Review_Total_Negative_Word_Counts  Total_Number_of_Reviews  \\\n",
       "0                                397                     1403   \n",
       "1                                  0                     1403   \n",
       "2                                 42                     1403   \n",
       "3                                210                     1403   \n",
       "4                                140                     1403   \n",
       "\n",
       "                                     Positive_Review  \\\n",
       "0   Only the park outside of the hotel was beauti...   \n",
       "1   No real complaints the hotel was great great ...   \n",
       "2   Location was good and staff were ok It is cut...   \n",
       "3   Great location in nice surroundings the bar a...   \n",
       "4    Amazing location and building Romantic setting    \n",
       "\n",
       "   Review_Total_Positive_Word_Counts  \\\n",
       "0                                 11   \n",
       "1                                105   \n",
       "2                                 21   \n",
       "3                                 26   \n",
       "4                                  8   \n",
       "\n",
       "   Total_Number_of_Reviews_Reviewer_Has_Given  Reviewer_Score  \\\n",
       "0                                           7             2.9   \n",
       "1                                           7             7.5   \n",
       "2                                           9             7.1   \n",
       "3                                           1             3.8   \n",
       "4                                           3             6.7   \n",
       "\n",
       "                                                Tags days_since_review  \\\n",
       "0  [' Leisure trip ', ' Couple ', ' Duplex Double...            0 days   \n",
       "1  [' Leisure trip ', ' Couple ', ' Duplex Double...            0 days   \n",
       "2  [' Leisure trip ', ' Family with young childre...            3 days   \n",
       "3  [' Leisure trip ', ' Solo traveler ', ' Duplex...            3 days   \n",
       "4  [' Leisure trip ', ' Couple ', ' Suite ', ' St...           10 days   \n",
       "\n",
       "         lat       lng  \n",
       "0  52.360576  4.915968  \n",
       "1  52.360576  4.915968  \n",
       "2  52.360576  4.915968  \n",
       "3  52.360576  4.915968  \n",
       "4  52.360576  4.915968  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     I am so angry that i made this post available...\n",
       "1                                          No Negative\n",
       "2     Rooms are nice but for elderly a bit difficul...\n",
       "3     My room was dirty and I was afraid to walk ba...\n",
       "4     You When I booked with your company on line y...\n",
       "Name: Negative_Review, dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1=df['Negative_Review']\n",
    "df1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('O')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Hotel_Address                                  object\n",
       "Additional_Number_of_Scoring                    int64\n",
       "Review_Date                                    object\n",
       "Average_Score                                 float64\n",
       "Hotel_Name                                     object\n",
       "Reviewer_Nationality                           object\n",
       "Negative_Review                                object\n",
       "Review_Total_Negative_Word_Counts               int64\n",
       "Total_Number_of_Reviews                         int64\n",
       "Positive_Review                                object\n",
       "Review_Total_Positive_Word_Counts               int64\n",
       "Total_Number_of_Reviews_Reviewer_Has_Given      int64\n",
       "Reviewer_Score                                float64\n",
       "Tags                                           object\n",
       "days_since_review                              object\n",
       "lat                                           float64\n",
       "lng                                           float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df1=df1.astype('str')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df1.head()\n",
    "df1.to_csv('negative.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hotel_Address</th>\n",
       "      <th>Additional_Number_of_Scoring</th>\n",
       "      <th>Review_Date</th>\n",
       "      <th>Average_Score</th>\n",
       "      <th>Hotel_Name</th>\n",
       "      <th>Reviewer_Nationality</th>\n",
       "      <th>Negative_Review</th>\n",
       "      <th>Review_Total_Negative_Word_Counts</th>\n",
       "      <th>Total_Number_of_Reviews</th>\n",
       "      <th>Positive_Review</th>\n",
       "      <th>Review_Total_Positive_Word_Counts</th>\n",
       "      <th>Total_Number_of_Reviews_Reviewer_Has_Given</th>\n",
       "      <th>Reviewer_Score</th>\n",
       "      <th>Tags</th>\n",
       "      <th>days_since_review</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>8/3/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>Russia</td>\n",
       "      <td>I am so angry that i made this post available...</td>\n",
       "      <td>397</td>\n",
       "      <td>1403</td>\n",
       "      <td>Only the park outside of the hotel was beauti...</td>\n",
       "      <td>11</td>\n",
       "      <td>7</td>\n",
       "      <td>2.9</td>\n",
       "      <td>[' Leisure trip ', ' Couple ', ' Duplex Double...</td>\n",
       "      <td>0 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>8/3/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>Ireland</td>\n",
       "      <td>No Negative</td>\n",
       "      <td>0</td>\n",
       "      <td>1403</td>\n",
       "      <td>No real complaints the hotel was great great ...</td>\n",
       "      <td>105</td>\n",
       "      <td>7</td>\n",
       "      <td>7.5</td>\n",
       "      <td>[' Leisure trip ', ' Couple ', ' Duplex Double...</td>\n",
       "      <td>0 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>7/31/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>Australia</td>\n",
       "      <td>Rooms are nice but for elderly a bit difficul...</td>\n",
       "      <td>42</td>\n",
       "      <td>1403</td>\n",
       "      <td>Location was good and staff were ok It is cut...</td>\n",
       "      <td>21</td>\n",
       "      <td>9</td>\n",
       "      <td>7.1</td>\n",
       "      <td>[' Leisure trip ', ' Family with young childre...</td>\n",
       "      <td>3 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>7/31/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>My room was dirty and I was afraid to walk ba...</td>\n",
       "      <td>210</td>\n",
       "      <td>1403</td>\n",
       "      <td>Great location in nice surroundings the bar a...</td>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "      <td>3.8</td>\n",
       "      <td>[' Leisure trip ', ' Solo traveler ', ' Duplex...</td>\n",
       "      <td>3 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>s Gravesandestraat 55 Oost 1092 AA Amsterdam ...</td>\n",
       "      <td>194</td>\n",
       "      <td>7/24/2017</td>\n",
       "      <td>7.7</td>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>New Zealand</td>\n",
       "      <td>You When I booked with your company on line y...</td>\n",
       "      <td>140</td>\n",
       "      <td>1403</td>\n",
       "      <td>Amazing location and building Romantic setting</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>6.7</td>\n",
       "      <td>[' Leisure trip ', ' Couple ', ' Suite ', ' St...</td>\n",
       "      <td>10 days</td>\n",
       "      <td>52.360576</td>\n",
       "      <td>4.915968</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       Hotel_Address  \\\n",
       "0   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "1   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "2   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "3   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "4   s Gravesandestraat 55 Oost 1092 AA Amsterdam ...   \n",
       "\n",
       "   Additional_Number_of_Scoring Review_Date  Average_Score   Hotel_Name  \\\n",
       "0                           194    8/3/2017            7.7  Hotel Arena   \n",
       "1                           194    8/3/2017            7.7  Hotel Arena   \n",
       "2                           194   7/31/2017            7.7  Hotel Arena   \n",
       "3                           194   7/31/2017            7.7  Hotel Arena   \n",
       "4                           194   7/24/2017            7.7  Hotel Arena   \n",
       "\n",
       "  Reviewer_Nationality                                    Negative_Review  \\\n",
       "0              Russia    I am so angry that i made this post available...   \n",
       "1             Ireland                                         No Negative   \n",
       "2           Australia    Rooms are nice but for elderly a bit difficul...   \n",
       "3      United Kingdom    My room was dirty and I was afraid to walk ba...   \n",
       "4         New Zealand    You When I booked with your company on line y...   \n",
       "\n",
       "   Review_Total_Negative_Word_Counts  Total_Number_of_Reviews  \\\n",
       "0                                397                     1403   \n",
       "1                                  0                     1403   \n",
       "2                                 42                     1403   \n",
       "3                                210                     1403   \n",
       "4                                140                     1403   \n",
       "\n",
       "                                     Positive_Review  \\\n",
       "0   Only the park outside of the hotel was beauti...   \n",
       "1   No real complaints the hotel was great great ...   \n",
       "2   Location was good and staff were ok It is cut...   \n",
       "3   Great location in nice surroundings the bar a...   \n",
       "4    Amazing location and building Romantic setting    \n",
       "\n",
       "   Review_Total_Positive_Word_Counts  \\\n",
       "0                                 11   \n",
       "1                                105   \n",
       "2                                 21   \n",
       "3                                 26   \n",
       "4                                  8   \n",
       "\n",
       "   Total_Number_of_Reviews_Reviewer_Has_Given  Reviewer_Score  \\\n",
       "0                                           7             2.9   \n",
       "1                                           7             7.5   \n",
       "2                                           9             7.1   \n",
       "3                                           1             3.8   \n",
       "4                                           3             6.7   \n",
       "\n",
       "                                                Tags days_since_review  \\\n",
       "0  [' Leisure trip ', ' Couple ', ' Duplex Double...            0 days   \n",
       "1  [' Leisure trip ', ' Couple ', ' Duplex Double...            0 days   \n",
       "2  [' Leisure trip ', ' Family with young childre...            3 days   \n",
       "3  [' Leisure trip ', ' Solo traveler ', ' Duplex...            3 days   \n",
       "4  [' Leisure trip ', ' Couple ', ' Suite ', ' St...           10 days   \n",
       "\n",
       "         lat       lng  \n",
       "0  52.360576  4.915968  \n",
       "1  52.360576  4.915968  \n",
       "2  52.360576  4.915968  \n",
       "3  52.360576  4.915968  \n",
       "4  52.360576  4.915968  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2=df.head(100000)\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Hotel_Address                                 100000\n",
       "Additional_Number_of_Scoring                  100000\n",
       "Review_Date                                   100000\n",
       "Average_Score                                 100000\n",
       "Hotel_Name                                    100000\n",
       "Reviewer_Nationality                          100000\n",
       "Negative_Review                               100000\n",
       "Review_Total_Negative_Word_Counts             100000\n",
       "Total_Number_of_Reviews                       100000\n",
       "Positive_Review                               100000\n",
       "Review_Total_Positive_Word_Counts             100000\n",
       "Total_Number_of_Reviews_Reviewer_Has_Given    100000\n",
       "Reviewer_Score                                100000\n",
       "Tags                                          100000\n",
       "days_since_review                             100000\n",
       "lat                                            99963\n",
       "lng                                            99963\n",
       "dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
